# -*- coding: UTF-8 -*-
from pyspark.sql import SparkSession

if __name__ == '__main__':
    spark = SparkSession.builder.master("local").appName("demo").getOrCreate()

    # **********begin**********#
    df = spark.read.option("header", True).option("delimiter", "\t").csv("/root/data.csv")
    df.createTempView("data")
    spark.sql("""
    select regexp_replace(TRIP_ID,'\\\W+','') as TRIP_ID ,
        regexp_replace(CALL_TYPE,'\\\W+','') as CALL_TYPE ,
        regexp_replace(ORIGIN_CALL,'\\\W+','') as ORIGIN_CALL ,
        regexp_replace(TAXI_ID,'\\\W+','') as TAXI_ID ,
        regexp_replace(ORIGIN_STAND,'\\\W+','') as ORIGIN_STAND ,
        regexp_replace(TIMESTAMP,'\\\W+','') as TIMESTAMP ,
        regexp_replace(POLYLINE,'\\\W+','') as POLYLINE
    from data
    """).show()
    # **********end**********#
    spark.stop()
